library(plotly)
## Warning: package 'plotly' was built under R version 4.2.1
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.2.1
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(zoo)
## Warning: package 'zoo' was built under R version 4.2.1
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(ggplot2)
library(DSIWastewater)
#load WasteWater_data into the environment
data(WasteWater_data, package = "DSIWastewater")
#get DF into format for buildRegressionEstimateTable
baseWaste_DF <- buildWasteAnalysisDF(WasteWater_data)
baseWaste_DF$site <- ifelse(baseWaste_DF$site == "Madison MSD WWTF",
"Madison", baseWaste_DF$site)
baseWaste_DF <- baseWaste_DF[!(baseWaste_DF$site %in% c("Portage WWTF","Cedarburg WWTF")),]
baseWaste_DF <- baseWaste_DF[baseWaste_DF$n > 10,]
K=3
baseWaste_DF <- baseWaste_DF%>%
group_by(site)%>%
arrange(site, date)%>%
#create K day mean of the same column to use later
mutate(pastKavg.wwlog10 = rollmean(sars_cov2_adj_load_log10,
K, align = "right",
fill=NA))%>%
ungroup()
#add quantile data to merge with the regression results
Quantiles_DF <- makeQuantileColumns(baseWaste_DF,
5:9/10, c(14, 30, 60, 90),
"sars_cov2_adj_load_log10")
Quantiles_DF <- Quantiles_DF[,c("site", "date", "window", "quant", "ntile", "pastKavg.wwlog10")]
Quantiles_DF <- tidyr::pivot_wider(Quantiles_DF,
names_from = c(window, quant),
values_from = c(ntile))
#Get 5 day rolling regression of data
CDCMethod <- buildRegressionEstimateTable(baseWaste_DF,
PSigTest=FALSE)
CDCMethod <- CDCMethod[,c("date","site", "modeled_percentchange", "lmreg_sig")]
#merge the regression DF and the quantile DF to get info for
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.2.1
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
FULL_reg_DF <- left_join(CDCMethod, Quantiles_DF,
by = c("site", "date"))%>%
tidyr::pivot_longer(cols = '14_0.5':'90_0.9',
names_to = c("window", "quant"),
values_to = "ntile",
names_sep = "_")#
#create flags described in @return
FULL_reg_DF <- classifyQuantileFlagRegression(FULL_reg_DF)
#return only flags and type columns
Full_wasteFlags <- FULL_reg_DF[,c("site", "date",
"window", "quant",
"cdc_flag",
"flag_ntile",
"flag_ntile_Pval")]
data(Case_data, package = "DSIWastewater")
Case_DF <- Case_data
#get the case flags
Case_DF <- buildCaseAnalysisDF(Case_DF)
CaseRegressionOutput <- buildRegressionEstimateTable(DataMod = Case_DF,
RunOn = c("FirstConfirmed.Per100K", "pastwk.avg.casesperday.Per100K"),
SplitOn = "site", DaysRegressed = 7)
case_flags_names <- c("case_flag",
"case_flag_plus_comm.threshold",
"slope_switch_flag")
CaseRegressionOutput$Method <- ifelse(CaseRegressionOutput$Method ==
"FirstConfirmed.Per100K",
"Cases", "7DayCases")
library(dplyr)
#Classify slope to create 3 flags described in @return
CaseFlags <- classifyCaseRegression(CaseRegressionOutput)%>%
select(Method, site, date,all_of(case_flags_names))%>%
tidyr:::pivot_wider(names_from = "Method", values_from = all_of(case_flags_names))
Full_wasteFlags <- Full_wasteFlags%>%
rename(cdc.flag = cdc_flag,
flag.ntile = flag_ntile,
flag.ntile.Pval = flag_ntile_Pval)%>%
tidyr::pivot_wider(names_from = c(window, quant),
values_from = c(cdc.flag, flag.ntile, flag.ntile.Pval))
Flag_DF <- full_join(CaseFlags, Full_wasteFlags,
by = c("site", "date"))
date_Flag_DF <- DF_date_vector(Flag_DF, "date",
names(Flag_DF)[3:68])
#"case_flag_Cases" "case_flag_7DayCases"
#"case_flag_plus_comm.threshold_Cases" "case_flag_plus_comm.threshold_7DayCases"
#"slope_switch_flag_Cases" "slope_switch_flag_7DayCases"
dep_flags <- names(Flag_DF)[9:68]
edgeThresh <- 7
CaseFlag <- "slope_switch_flag_Cases"
DateDistDF <- date_distance_calc(date_Flag_DF, CaseFlag,
dep_flags, edge = edgeThresh)%>%
select(site, date, dep_flags)%>%
tidyr::pivot_longer(cols = dep_flags,
names_to = c("FlagType","window", "quant"),
values_to = "FlagError",
names_sep = "_")%>%
mutate(window = as.numeric(window), quant = as.numeric(quant))
## Note: Using an external vector in selections is ambiguous.
## ℹ Use `all_of(dep_flags)` instead of `dep_flags` to silence this message.
## ℹ See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This message is displayed once per session.
<Above has been done before by peter. the next section is where we hope to show value>
CaseNumberFlags <- sum(Flag_DF[[CaseFlag]], na.rm = TRUE)
Flag_DF%>%
group_by(site)%>%
summarise(across(c(dep_flags, !!sym(CaseFlag)), ~sum(.x, na.rm=TRUE)))%>%
mutate(across(c(dep_flags), ~(.x-!!sym(CaseFlag))))%>%
ungroup()%>%
summarise(across(c(dep_flags), ~sum(abs(.x), na.rm=TRUE)))%>%
tidyr::pivot_longer(cols = dep_flags,
names_to = c("FlagType","window", "quant"),
values_to = "TotalFlagCountDiff",
names_sep = "_")%>%
arrange(TotalFlagCountDiff)
## # A tibble: 60 × 4
## FlagType window quant TotalFlagCountDiff
## <chr> <chr> <chr> <dbl>
## 1 flag.ntile.Pval 30 0.5 968
## 2 flag.ntile.Pval 30 0.6 983
## 3 flag.ntile 30 0.6 984
## 4 flag.ntile 30 0.5 1009
## 5 flag.ntile 60 0.5 1009
## 6 flag.ntile 60 0.6 1016
## 7 flag.ntile 90 0.5 1017
## 8 flag.ntile 30 0.7 1028
## 9 flag.ntile 90 0.6 1033
## 10 flag.ntile 60 0.7 1047
## # … with 50 more rows
DistSummary <- DateDistDF%>%
group_by(window, quant, FlagType)%>%
summarise(Mean = mean(FlagError, na.rm = TRUE),
MeanErrorSquard = mean(FlagError^2, na.rm = TRUE),
Var = var(FlagError, na.rm = TRUE),
n = sum(!is.na(FlagError)),
Missed = mean(FlagError == edgeThresh, na.rm = TRUE))
## `summarise()` has grouped output by 'window', 'quant'. You can override using
## the `.groups` argument.
Plot_DF_List <- split(DistSummary, DistSummary$FlagType)
plot_ly(showlegend = TRUE, opacity = .8)%>%
add_trace(x = Plot_DF_List[[1]]$window, y = Plot_DF_List[[1]]$quant,
z = Plot_DF_List[[1]]$Mean, name = "cdc.flag", type="mesh3d")%>%
add_trace(x = Plot_DF_List[[2]]$window, y = Plot_DF_List[[2]]$quant,
z = Plot_DF_List[[2]]$Mean, name = "flag.ntile", type="mesh3d")%>%
add_trace(x = Plot_DF_List[[3]]$window, y = Plot_DF_List[[3]]$quant,
z = Plot_DF_List[[3]]$Mean, name = "flag.ntile.Pval", type="mesh3d")%>%
layout(scene= list(xaxis = list(title = 'window'),
yaxis = list(title = 'quant'),
zaxis = list(title = 'Mean')))
plot_ly(showlegend = TRUE, opacity = .8)%>%
add_trace(x = Plot_DF_List[[1]]$window, y = Plot_DF_List[[1]]$quant,
z = Plot_DF_List[[1]]$MeanErrorSquard,
name = "cdc.flag", type="mesh3d")%>%
add_trace(x = Plot_DF_List[[2]]$window, y = Plot_DF_List[[2]]$quant,
z = Plot_DF_List[[2]]$MeanErrorSquard,
name = "flag.ntile", type="mesh3d")%>%
add_trace(x = Plot_DF_List[[3]]$window, y = Plot_DF_List[[3]]$quant,
z = Plot_DF_List[[3]]$MeanErrorSquard,
name = "flag.ntile.Pval", type="mesh3d")%>%
layout(scene= list(xaxis = list(title = 'window'),
yaxis = list(title = 'quant'),
zaxis = list(title = 'MeanErrorSquard')))
plot_ly(showlegend = TRUE, opacity = .8)%>%
add_trace(x = Plot_DF_List[[1]]$window, y = Plot_DF_List[[1]]$quant,
z = Plot_DF_List[[1]]$Var, name = "cdc.flag", type="mesh3d")%>%
add_trace(x = Plot_DF_List[[2]]$window, y = Plot_DF_List[[2]]$quant,
z = Plot_DF_List[[2]]$Var, name = "flag.ntile", type="mesh3d")%>%
add_trace(x = Plot_DF_List[[3]]$window, y = Plot_DF_List[[3]]$quant,
z = Plot_DF_List[[3]]$Var, name = "flag.ntile.Pval", type="mesh3d")%>%
layout(scene= list(xaxis = list(title = 'window'),
yaxis = list(title = 'quant'),
zaxis = list(title = 'Var')))
plot_ly(showlegend = TRUE, opacity = .8)%>%
add_trace(x = Plot_DF_List[[1]]$window, y = Plot_DF_List[[1]]$quant,
z = Plot_DF_List[[1]]$Missed, name = "cdc.flag", type="mesh3d")%>%
add_trace(x = Plot_DF_List[[2]]$window, y = Plot_DF_List[[2]]$quant,
z = Plot_DF_List[[2]]$Missed, name = "flag.ntile",
type="mesh3d")%>%
add_trace(x = Plot_DF_List[[3]]$window, y = Plot_DF_List[[3]]$quant,
z = Plot_DF_List[[3]]$Missed,
name = "flag.ntile.Pval", type="mesh3d")%>%
layout(scene= list(xaxis = list(title = 'window'),
yaxis = list(title = 'quant'),
zaxis = list(title = 'Missed')))
plot_ly(showlegend = TRUE, opacity = .8)%>%
add_trace(x = Plot_DF_List[[1]]$window, y = Plot_DF_List[[1]]$quant,
z = Plot_DF_List[[1]]$n, name = "cdc.flag", type="mesh3d")%>%
add_trace(x = Plot_DF_List[[2]]$window, y = Plot_DF_List[[2]]$quant,
z = Plot_DF_List[[2]]$n, name = "flag.ntile", type="mesh3d")%>%
add_trace(x = Plot_DF_List[[3]]$window, y = Plot_DF_List[[3]]$quant,
z = Plot_DF_List[[3]]$n, name = "flag.ntile.Pval", type="mesh3d")%>%
layout(scene= list(xaxis = list(title = 'window'),
yaxis = list(title = 'quant'),
zaxis = list(title = 'n')))
DateDistDF%>%
ggplot()+
stat_count(aes(x = FlagError, fill = FlagType,
y = ..prop..),
position = "dodge")+
facet_grid(window~quant)
## Warning: Removed 2035987 rows containing non-finite values (stat_count).
DistSummary%>%
arrange(MeanErrorSquard)
## # A tibble: 60 × 8
## # Groups: window, quant [20]
## window quant FlagType Mean MeanErrorSquard Var n Missed
## <dbl> <dbl> <chr> <dbl> <dbl> <dbl> <int> <dbl>
## 1 90 0.9 flag.ntile.Pval 0.0120 3 3.01 334 0.0240
## 2 14 0.9 flag.ntile.Pval -0.172 3.14 3.22 29 0.0345
## 3 90 0.9 flag.ntile 0.0312 3.24 3.24 416 0.0264
## 4 14 0.9 flag.ntile 0.0678 3.25 3.31 59 0.0339
## 5 60 0.9 flag.ntile.Pval -0.0580 3.58 3.59 293 0.0239
## 6 90 0.8 flag.ntile.Pval -0.0382 4.16 4.16 550 0.0327
## 7 60 0.9 flag.ntile -0.118 4.34 4.33 357 0.0280
## 8 90 0.8 flag.ntile -0.0252 4.38 4.38 714 0.0350
## 9 60 0.8 flag.ntile.Pval -0.160 4.96 4.94 594 0.0303
## 10 90 0.7 flag.ntile.Pval -0.0328 5.02 5.02 671 0.0387
## # … with 50 more rows
A <- DistSummary%>%
ggplot(aes(x = n, y = MeanErrorSquard, color = paste(window, quant)))+
geom_point()+
geom_vline(xintercept = CaseNumberFlags)
ggplotly(A)
## Warning: `gather_()` was deprecated in tidyr 1.2.0.
## Please use `gather()` instead.
B <- DistSummary%>%
ggplot(aes(x = n, y = Var, color = paste(window, quant)))+
geom_point()+
geom_vline(xintercept = CaseNumberFlags)
ggplotly(B)
#peters method to select outliers
SitePop <- baseWaste_DF[,c("site","population_served")]%>%
group_by(site)%>%
summarise(mean_pop_served = mean(population_served, na.rm = TRUE))
flag_Model_DF = DateDistDF %>%
left_join(SitePop)%>%
mutate(abs_difference = abs(FlagError),
inverse_difference = 1/(FlagError+0.01)*100) %>%
# Filter out differences >30
filter(abs_difference <=30)
## Joining, by = "site"
testmodel = lm(inverse_difference ~ quant + window + date + mean_pop_served,
data = flag_Model_DF[which(!is.na(flag_Model_DF$inverse_difference)),])
summary(testmodel)
##
## Call:
## lm(formula = inverse_difference ~ quant + window + date + mean_pop_served,
## data = flag_Model_DF[which(!is.na(flag_Model_DF$inverse_difference)),
## ])
##
## Residuals:
## Min 1Q Median 3Q Max
## -6777 -6063 3546 3877 5107
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.631e+04 2.386e+03 11.029 < 2e-16 ***
## quant 5.255e+02 1.398e+02 3.759 0.000171 ***
## window 1.410e+00 6.739e-01 2.093 0.036392 *
## date -1.085e+00 1.268e-01 -8.558 < 2e-16 ***
## mean_pop_served -1.758e-03 1.245e-04 -14.121 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4859 on 64488 degrees of freedom
## Multiple R-squared: 0.005071, Adjusted R-squared: 0.005009
## F-statistic: 82.17 on 4 and 64488 DF, p-value: < 2.2e-16